{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# <b>S3 Test Notebook</b>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Here you have to set values of environment variables\n",
    "\n",
    "os.environ['BLAZINGSQL_E2E_AWS_S3_ACCESS_KEY_ID']=''\n",
    "os.environ['BLAZINGSQL_E2E_AWS_S3_SECRET_KEY']=''\n",
    "\n",
    "os.environ['BLAZINGSQL_E2E_AWS_S3_VIRGINIA_PUBLIC']=''\n",
    "os.environ['BLAZINGSQL_E2E_AWS_S3_VIRGINIA_AES']=''\n",
    "os.environ['BLAZINGSQL_E2E_AWS_S3_VIRGINIA_KMS']=''\n",
    "\n",
    "os.environ['BLAZINGSQL_E2E_AWS_S3_OREGON_PUBLIC']=''\n",
    "os.environ['BLAZINGSQL_E2E_AWS_S3_OREGON_AES']=''\n",
    "os.environ['BLAZINGSQL_E2E_AWS_S3_OREGON_KMS']=''\n",
    "\n",
    "os.environ['BLAZINGSQL_E2E_AWS_S3_KMS_ID_VIRGINIA']=''\n",
    "os.environ['BLAZINGSQL_E2E_AWS_S3_KMS_ID_OREGON']=''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Getting values of environment variables\n",
    "\n",
    "access_key_id = os.getenv(\"BLAZINGSQL_E2E_AWS_S3_ACCESS_KEY_ID\")\n",
    "secret_key = os.getenv(\"BLAZINGSQL_E2E_AWS_S3_SECRET_KEY\")\n",
    "\n",
    "blazingsql_bucket = os.getenv(\"BLAZINGSQL_E2E_AWS_S3_VIRGINIA_PUBLIC\")\n",
    "blazingsql_bucket_aes = os.getenv(\"BLAZINGSQL_E2E_AWS_S3_VIRGINIA_AES\")\n",
    "blazingsql_bucket_aws_kms = os.getenv(\"BLAZINGSQL_E2E_AWS_S3_VIRGINIA_KMS\")\n",
    "\n",
    "blazingsql_bucket_us_west2 = os.getenv(\"BLAZINGSQL_E2E_AWS_S3_OREGON_PUBLIC\")\n",
    "blazingsql_bucket_us_west2_aes = os.getenv(\"BLAZINGSQL_E2E_AWS_S3_OREGON_AES\")\n",
    "blazingsql_bucket_us_west2_aws_kms = os.getenv(\"BLAZINGSQL_E2E_AWS_S3_OREGON_KMS\")\n",
    "\n",
    "kms_id_virginia = os.getenv(\"BLAZINGSQL_E2E_AWS_S3_KMS_ID_VIRGINIA\")\n",
    "kms_id_oregon = os.getenv(\"BLAZINGSQL_E2E_AWS_S3_KMS_ID_OREGON\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## <b>Init Context</b>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import dask"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from dask.distributed import Client"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ip_port_dask_scheduller = 'SCHEDULER_IP:8786'\n",
    "network_interface = 'ens5' "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "client = Client(ip_port_dask_scheduller)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "client"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from blazingsql import BlazingContext"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bc = BlazingContext(dask_client = client, network_interface='ens5')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## <b>Register and create tables in Virginia region</b>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<b>Buckets</b>\n",
    "\n",
    "* blazingsql-bucket\n",
    "* blazingsql-bucket-aes-256\n",
    "* blazingsql-bucket-aws-kms\n",
    "\n",
    "<b>Tablas</b>\n",
    "* nation\n",
    "* region\n",
    "* customer\n",
    "* lineitem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from blazingsql import S3EncryptionType"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "authority = \"tpch_s3_virginia\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bc.s3(authority, bucket_name=blazingsql_bucket, encryption_type=S3EncryptionType.NONE,\n",
    "      access_key_id=access_key_id, secret_key=secret_key)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dir_data_lc = \"s3://\" + authority + \"/\" + \"DataSet100Mb2part/\" + \"tpch/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ext = \"parquet\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "table_files_nation = (\"%s/%s_[0-9]*.%s\") % (dir_data_lc, \"nation\", ext)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "table_files_region = (\"%s/%s_[0-9]*.%s\") % (dir_data_lc, \"region\", ext)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"nation files: \" + table_files_nation)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bc.create_table(\"nation\", table_files_nation)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bc.create_table(\"region\", table_files_region)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"\"\"select n1.n_nationkey as n1key, n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey \n",
    "from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 \n",
    "where n1.n_nationkey < 10 and n1.n_nationkey > 5\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = bc.sql(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(result.compute())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "authority = \"tpch_s3_virginia_aes\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bc.s3(authority, bucket_name=blazingsql_bucket_aes,\n",
    "      access_key_id=access_key_id,\n",
    "      secret_key=secret_key,\n",
    "      encryption_type=S3EncryptionType.AES_256)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dir_data_lc = \"s3://\" + authority + \"/\" + \"DataSet100Mb2part/\" + \"tpch/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "table_files_customer = (\"%s/%s_[0-9]*.%s\") % (dir_data_lc, \"customer\", ext)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"customer files: \" + table_files_customer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bc.create_table(\"customer\", table_files_customer)\n",
    "print(\"customer table created!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"\"\"select * from customer limit 10\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = bc.sql(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(result.compute())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "authority = \"tpch_s3_virginia_kms\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bc.s3(authority, bucket_name=blazingsql_bucket_aws_kms,\n",
    "      access_key_id=access_key_id,\n",
    "      secret_key=secret_key,\n",
    "      encryption_type=S3EncryptionType.AWS_KMS,\n",
    "      kms_key_amazon_resource_name=kms_id_virginia)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dir_data_lc = \"s3://\" + authority + \"/\" + \"DataSet100Mb2part/\" + \"tpch/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "table_files_lineitem = (\"%s/%s_[0-9]*.%s\") % (dir_data_lc, \"lineitem\", ext)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bc.create_table(\"lineitem\", table_files_lineitem)\n",
    "print(\"lineitem table created!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"\"\"select * from lineitem limit 10\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = bc.sql(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(result.compute())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## <b>Register and create tables in Oregon region</b>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<b>Buckets</b>\n",
    "\n",
    "* blazingsql-bucket-us-west-2\n",
    "* blazingsql-bucket-us-west-2-aes-256\n",
    "* blazingsql-bucket-us-west-2-aws-kms\n",
    "\n",
    "<b>Tablas</b>\n",
    "\n",
    "* part\n",
    "* partsupp\n",
    "* supplier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "authority = \"tpch_s3_oregon\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bc.s3(authority, bucket_name=blazingsql_bucket_us_west2, \n",
    "      encryption_type=S3EncryptionType.NONE,\n",
    "      access_key_id=access_key_id,\n",
    "      secret_key=secret_key)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dir_data_lc = \"s3://\" + authority + \"/\" + \"DataSet100Mb2part/\" + \"tpch/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "table_files_part = (\"%s/%s_[0-9]*.%s\") % (dir_data_lc, \"part\", ext)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"part files: \" + table_files_part)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bc.create_table(\"part\", table_files_part)\n",
    "print(\"part table created!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"select * from part limit 20\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = bc.sql(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(result.compute())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "authority = \"tpch_s3_oregon_aes\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bc.s3(authority, bucket_name=blazingsql_bucket_us_west2_aes,\n",
    "      access_key_id=access_key_id,\n",
    "      secret_key=secret_key,\n",
    "      encryption_type=S3EncryptionType.AES_256)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dir_data_lc = \"s3://\" + authority + \"/\" + \"DataSet100Mb2part/\" + \"tpch/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "table_files_partsupp = (\"%s/%s_[0-9]*.%s\") % (dir_data_lc, \"partsupp\", ext)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"partsupp files: \" + table_files_partsupp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bc.create_table(\"partsupp\", table_files_partsupp)\n",
    "print(\"partsupp table created!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"\"\"select * from partsupp limit 10\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = bc.sql(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(result.compute())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "authority = \"tpch_s3_oregon_kms\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bc.s3(authority, bucket_name=blazingsql_bucket_us_west2_aws_kms,\n",
    "      access_key_id=access_key_id,\n",
    "      secret_key=secret_key,\n",
    "      encryption_type=S3EncryptionType.AWS_KMS,\n",
    "      kms_key_amazon_resource_name=kms_id_oregon)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dir_data_lc = \"s3://\" + authority + \"/\" + \"DataSet100Mb2part/\" + \"tpch/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "table_files_orders = (\"%s/%s_[0-9]*.%s\") % (dir_data_lc, \"orders\", ext)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"orders files: \" + table_files_orders)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bc.create_table(\"orders\", table_files_orders)\n",
    "print(\"orders table created!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"\"\"select * from orders limit 20\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = bc.sql(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(result.compute())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# <b>Queries with created tables with data from different regions</b>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"\"\"\n",
    "                select\n",
    "                    l.l_orderkey,\n",
    "                    sum(l.l_extendedprice*(1-l.l_discount)) as revenue,\n",
    "                    o.o_orderdate,\n",
    "                    o.o_shippriority\n",
    "                from\n",
    "                    customer c\n",
    "                    inner join orders o \n",
    "                    on c.c_custkey = o.o_custkey\n",
    "                    inner join lineitem l\n",
    "                    on l.l_orderkey = o.o_orderkey\n",
    "                where\n",
    "                    c.c_mktsegment = 'BUILDING'\n",
    "                    and o.o_orderdate < date '1995-03-15'\n",
    "                    and l.l_shipdate > date '1995-03-15'\n",
    "                group by\n",
    "                    l.l_orderkey,\n",
    "                    o.o_orderdate,\n",
    "                    o.o_shippriority\n",
    "                order by\n",
    "                    revenue desc,\n",
    "                    o.o_orderdate\n",
    "                limit 10\n",
    "            \"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = bc.sql(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(result.compute())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"\"\"\n",
    "                select\n",
    "                    c.c_custkey,\n",
    "                    c.c_name,\n",
    "                    sum(l.l_extendedprice * (1 - l.l_discount)) as revenue,\n",
    "                    c.c_acctbal,\n",
    "                    n.n_name,\n",
    "                    c.c_address,\n",
    "                    c.c_phone,\n",
    "                    c.c_comment\n",
    "                from\n",
    "                    customer c\n",
    "                    inner join orders o\n",
    "                    on c.c_custkey = o.o_custkey\n",
    "                    inner join lineitem l\n",
    "                    on l.l_orderkey = o.o_orderkey\n",
    "                    inner join nation n\n",
    "                    on c.c_nationkey = n.n_nationkey\n",
    "                where\n",
    "                    o.o_orderdate >= date '1993-10-01'\n",
    "                    and o.o_orderdate < date '1993-10-01' + interval '3' month\n",
    "                    and l.l_returnflag = 'R'\n",
    "                group by\n",
    "                    c.c_custkey,\n",
    "                    c.c_name,\n",
    "                    c.c_acctbal,\n",
    "                    c.c_phone,\n",
    "                    n.n_name,\n",
    "                    c.c_address,\n",
    "                    c.c_comment\n",
    "                order by\n",
    "                    revenue desc\n",
    "                limit 20\n",
    "            \"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = bc.sql(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(result.compute())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Rapids Stable",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}